{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}

{-
    BNF Converter: C flex generator
    Copyright (C) 2004  Author:  Michael Pellauer

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
-}

{-
   **************************************************************
    BNF Converter Module

    Description   : This module generates the Flex file. It is
                    similar to JLex but with a few peculiarities.

    Author        : Michael Pellauer (pellauer@cs.chalmers.se)

    License       : GPL (GNU General Public License)

    Created       : 5 August, 2003

    Modified      : 10 August, 2003


   **************************************************************
-}
module BNFC.Backend.C.CFtoFlexC (cf2flex, lexComments, cMacros, commentStates) where

import Prelude'
import Data.Bifunctor (first)
import Data.List  (isInfixOf)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map

import BNFC.CF
import BNFC.Backend.C.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.PrettyPrint
import BNFC.Utils (cstring, unless)

-- | Entrypoint.
cf2flex :: String -> CF -> (String, SymMap) -- The environment is reused by the parser.
cf2flex name cf = (, env) $ unlines
    [ prelude name
    , cMacros cf
    , lexSymbols env0
    , restOfFlex cf env
    ]
  where
    env  = Map.fromList env1
    env0 = makeSymEnv (cfgSymbols cf ++ reservedWords cf) [0 :: Int ..]
    env1 = map (first Keyword )env0 ++ makeSymEnv (map Tokentype $ tokenNames cf) [length env0 ..]
    makeSymEnv = zipWith $ \ s n -> (s, "_SYMB_" ++ show n)

prelude :: String -> String
prelude name = unlines
  [
   "/* -*- c -*- This FLex file was machine-generated by the BNF converter */",
   -- noinput and nounput are most often unused
   -- https://stackoverflow.com/questions/39075510/option-noinput-nounput-what-are-they-for
   "%option noyywrap noinput nounput",
   "%{",
   "#define yylval " ++ name ++ "lval",
   "#define yylloc " ++ name ++ "lloc",
   "#define YY_BUFFER_APPEND " ++ name ++ "_BUFFER_APPEND",
   "#define YY_BUFFER_RESET " ++ name ++ "_BUFFER_RESET",
   "#define init_lexer " ++ name ++ "_init_lexer",
   "#include <string.h>",
   "#include \"Parser.h\"",
   "#define YY_BUFFER_LENGTH 4096",
   "char YY_PARSED_STRING[YY_BUFFER_LENGTH];",
   "void YY_BUFFER_APPEND(char *s)",
   "{",
   "  strcat(YY_PARSED_STRING, s); /* Do something better here! */",
   "}",
   "void YY_BUFFER_RESET(void)",
   "{",
   "  int x;",
   "  for(x = 0; x < YY_BUFFER_LENGTH; x++)",
   "    YY_PARSED_STRING[x] = 0;",
   "}",
   -- https://www.gnu.org/software/bison/manual/html_node/Token-Locations.html
   -- Flex is responsible for keeping tracking of the yylloc for Bison.
   -- Flex also doesn't do this automatically so we need this function
   -- https://stackoverflow.com/a/22125500/425756
   "static void update_loc(YYLTYPE* loc, char* text)",
   "{",
   "  loc->first_line = loc->last_line;",
   "  loc->first_column = loc->last_column;",
   "  int i = 0;",  -- put this here as @for (int i...)@ is only allowed in C99
   "  for (; text[i] != '\\0'; ++i) {",
   "      if (text[i] == '\\n') {",
   "          ++loc->last_line;",
   "          loc->last_column = 0; ",
   "      } else {",
   "          ++loc->last_column; ",
   "      }",
   "  }",
   "}",
   "#define YY_USER_ACTION update_loc(&yylloc, yytext);",
   "",
   "%}"
  ]

-- For now all categories are included.
-- Optimally only the ones that are used should be generated.
cMacros :: CF ->  String
cMacros cf = unlines
  [ "LETTER [a-zA-Z]"
  , "CAPITAL [A-Z]"
  , "SMALL [a-z]"
  , "DIGIT [0-9]"
  , "IDENT [a-zA-Z0-9'_]"
  , unwords $ concat
      [ [ "%START YYINITIAL CHAR CHARESC CHAREND STRING ESCAPED" ]
      , take (numberOfBlockCommentForms cf) commentStates
      ]
  , "%%"
  ]

lexSymbols :: KeywordEnv -> String
lexSymbols ss = concatMap transSym ss
  where
    transSym (s,r) =
      "<YYINITIAL>\"" ++ s' ++ "\"      \t return " ++ r ++ ";\n"
        where
         s' = escapeChars s

restOfFlex :: CF -> SymMap -> String
restOfFlex cf env = unlines $ concat
  [ [ render $ lexComments Nothing (comments cf)
    , ""
    ]
  , userDefTokens
  , ifC catString  strStates
  , ifC catChar    chStates
  , ifC catDouble  [ "<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)?      \t yylval._double = atof(yytext); return _DOUBLE_;" ]
  , ifC catInteger [ "<YYINITIAL>{DIGIT}+      \t yylval._int = atoi(yytext); return _INTEGER_;" ]
  , ifC catIdent   [ "<YYINITIAL>{LETTER}{IDENT}*      \t yylval._string = strdup(yytext); return _IDENT_;" ]
  , [ "<YYINITIAL>[ \\t\\r\\n\\f]      \t /* ignore white space. */;"
    , "<YYINITIAL>.      \t return _ERROR_;"
    , "%%"
    ]
  , footer
  ]
  where
  ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
  userDefTokens =
    [ "<YYINITIAL>" ++ printRegFlex exp ++
       "    \t yylval._string = strdup(yytext); return " ++ sName name ++ ";"
    | (name, exp) <- tokenPragmas cf
    ]
    where sName n = fromMaybe n $ Map.lookup (Tokentype n) env
  strStates =  --These handle escaped characters in Strings.
    [ "<YYINITIAL>\"\\\"\"      \t BEGIN STRING;"
    , "<STRING>\\\\      \t BEGIN ESCAPED;"
    , "<STRING>\\\"      \t yylval._string = strdup(YY_PARSED_STRING); YY_BUFFER_RESET(); BEGIN YYINITIAL; return _STRING_;"
    , "<STRING>.      \t YY_BUFFER_APPEND(yytext);"
    , "<ESCAPED>n      \t YY_BUFFER_APPEND(\"\\n\"); BEGIN STRING;"
    , "<ESCAPED>\\\"      \t YY_BUFFER_APPEND(\"\\\"\"); BEGIN STRING ;"
    , "<ESCAPED>\\\\      \t YY_BUFFER_APPEND(\"\\\\\"); BEGIN STRING;"
    , "<ESCAPED>t       \t YY_BUFFER_APPEND(\"\\t\"); BEGIN STRING;"
    , "<ESCAPED>.       \t YY_BUFFER_APPEND(yytext); BEGIN STRING;"
    ]
  chStates =  --These handle escaped characters in Chars.
    [ "<YYINITIAL>\"'\" \tBEGIN CHAR;"
    , "<CHAR>\\\\      \t BEGIN CHARESC;"
    , "<CHAR>[^']      \t BEGIN CHAREND; yylval._char = yytext[0]; return _CHAR_;"
    , "<CHARESC>n      \t BEGIN CHAREND; yylval._char = '\\n'; return _CHAR_;"
    , "<CHARESC>t      \t BEGIN CHAREND; yylval._char = '\\t'; return _CHAR_;"
    , "<CHARESC>.      \t BEGIN CHAREND; yylval._char = yytext[0]; return _CHAR_;"
    , "<CHAREND>\"'\"      \t BEGIN YYINITIAL;"
    ]
  footer =
    [
     "void init_lexer(FILE *inp)",
     "{",
     "  yyrestart(inp);",
     "  yylloc.first_line   = 1;",
     "  yylloc.first_column = 1;",
     "  yylloc.last_line    = 1;",
     "  yylloc.last_column  = 1;",
     "  BEGIN YYINITIAL;",
     "}"
    ]

-- ---------------------------------------------------------------------------
-- Comments

-- | Create flex rules for single-line and multi-lines comments.
-- The first argument is an optional namespace (for C++); the second
-- argument is the set of comment delimiters as returned by BNFC.CF.comments.
--
-- This function is only compiling the results of applying either
-- lexSingleComment or lexMultiComment on each comment delimiter or pair of
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
lexComments _ (m,s) = vcat $ concat
  [ map    lexSingleComment s
  , zipWith lexMultiComment m commentStates
  ]

-- | If we have several block comments, we need different COMMENT lexing states.
commentStates :: [String]
commentStates = map ("COMMENT" ++) $ "" : map show [1..]

-- | Create a lexer rule for single-line comments.
-- The first argument is -- an optional c++ namespace
-- The second argument is the delimiter that marks the beginning of the
-- comment.
--
-- >>> lexSingleComment "--"
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
--
-- >>> lexSingleComment "\""
-- <YYINITIAL>"\""[^\n]* /* skip */; /* BNFC: comment "\"" */
lexSingleComment :: String -> Doc
lexSingleComment c =
    "<YYINITIAL>" <> cstring c <> "[^\\n]*"
    <+> "/* skip */;"
    <+> unless (containsCCommentMarker c) ("/* BNFC: comment" <+> cstring c <+> "*/")

containsCCommentMarker :: String -> Bool
containsCCommentMarker s = "/*" `isInfixOf` s || "*/" `isInfixOf` s

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
-- start deleminiter and end delimiter.
-- There might be a possible bug here if a language includes 2 multi-line
-- comments. They could possibly start a comment with one character and end it
-- with another.  However this seems rare.
--
-- >>> lexMultiComment ("{-", "-}") "COMMENT"
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
--
-- >>> lexMultiComment ("\"'", "'\"") "COMMENT"
-- <YYINITIAL>"\"'" BEGIN COMMENT; /* BNFC: block comment "\"'" "'\"" */
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexMultiComment :: (String, String) -> String -> Doc
lexMultiComment (b,e) comment = vcat
    [ "<YYINITIAL>" <> cstring b <+> "BEGIN" <+> text comment <> ";"
      <+> unless (containsCCommentMarker b || containsCCommentMarker e)
          ("/* BNFC: block comment" <+> cstring b <+> cstring e <+> "*/")
    , commentTag <> cstring e <+> "BEGIN YYINITIAL;"
    , commentTag <> ".    /* skip */;"
    , commentTag <> "[\\n] /* skip */;"
    ]
  where
  commentTag = text $ "<" ++ comment ++ ">"

-- | Helper function that escapes characters in strings.
escapeChars :: String -> String
escapeChars [] = []
escapeChars ('\\':xs) = '\\' : ('\\' : (escapeChars xs))
escapeChars ('\"':xs) = '\\' : ('\"' : (escapeChars xs))
escapeChars (x:xs) = x : (escapeChars xs)
